Read in SUBVAR dataset
FULL_MUTATIONS_DT = readRDS("SUBVARIANTS_FULL_MUTATIONS_DT.rds")
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% select(Peptide, VariantAlignment, Predicted_Binding, BA_Rank,`BA-score`, Binder, MT_Binder, MT_BA_Rank,`MT_BA-score`,Variant)%>%
separate_rows_(cols = c("Predicted_Binding", "BA_Rank","BA-score", "Binder", "MT_Binder", "MT_BA_Rank","MT_BA-score"),sep=",") %>%
mutate(BA_Rank = as.numeric(BA_Rank)) %>% mutate(MT_BA_Rank = as.numeric(MT_BA_Rank))%>%mutate(`BA-score` = as.numeric(`BA-score`)) %>% mutate(`MT_BA-score` = as.numeric(`MT_BA-score`))
#define Min-Max normalization function
min_max_norm <- function(x) {
(- 1 * ((x - min(x)) / (max(x) - min(x))))+1
}
#negative logistic function
neg_logit_function = function(x) {
1/(1+exp(1.5*(x-2)))
}
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% mutate(MHCScore = neg_logit_function(BA_Rank))%>% mutate(MT_MHCScore = neg_logit_function(MT_BA_Rank))
#FULL_MUTATIONS_DT %>% mutate(MHCScore = min_max_norm(BA_Rank))%>% mutate(MT_MHCScore = min_max_norm(MT_BA_Rank))
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT%>% filter(! (Binder == "NONBINDER" & MT_Binder == "NONBINDER"))
FULL_MUTATIONS_DT %>% head()
## # A tibble: 6 × 12
## Peptide VariantAlignment Predicted_Binding BA_Rank `BA-score` Binder MT_Binder
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr>
## 1 AEHVNNSY AEYVNNSY HLA-B1801 0.507 0.371 BINDER BINDER
## 2 AEHVNNSY AEYVNNSY HLA-B4402 0.674 0.358 BINDER BINDER
## 3 AEHVNNSY AEYVNNSY HLA-B4403 0.638 0.360 BINDER BINDER
## 4 APGQTGKIA APGQTGNIA HLA-B0702 0.846 0.424 BINDER BINDER
## 5 APGQTGKIA APGQTGNIA HLA-B4201 1.76 0.375 BINDER BINDER
## 6 APRITFGGP ALRITFGGP HLA-B0702 0.898 0.413 BINDER NONBINDER
## # … with 5 more variables: MT_BA_Rank <dbl>, MT_BA-score <dbl>, Variant <chr>,
## # MHCScore <dbl>, MT_MHCScore <dbl>
FullDataset=fread("TRAPP/DATA_V3_SARS2_ONLY/pred_scores/prott5_xl_bfd_peptides_sars2_mut_test_prediction_withlabel_2.txt")%>% distinct()
FullDataset=FullDataset %>% filter(! Dataset %in% 'Mutagenesis')
INPUT_DATA_TRAPP=fread("MUTAGENESIS_SUBSET_OMI_WUHAN_HLA_RUN_CHL_DNN_V4_INC_SUBVAR_WTNB_MTBINDER_ALLCOV2_MUTAGENESIS.txt")%>% distinct()%>%
mutate(BA_Rank = as.numeric(BA_Rank))%>% mutate(nlog2Rank = - log2(BA_Rank))
INPUT_DATA_TRAPP=INPUT_DATA_TRAPP %>% mutate(nlog2Rank = round(nlog2Rank, digits=4))
FullDataset=FullDataset%>% mutate(nlog2Rank = round(nlog2Rank, digits=4))
FullDataset=FullDataset %>% inner_join(INPUT_DATA_TRAPP)
## Joining, by = c("Peptide", "nlog2Rank", "Dataset", "Predicted_Binding")
FullDataset %>% select(Dataset)%>% table
## .
## OmicronTest SUBVAR_MT_TEST SUBVAR_WT_TEST WuhanTest
## 431 324 324 477
FullDataset=FullDataset%>%dplyr::rename(Prediction=prediction)
FullDataset=FullDataset %>% select(Peptide, Dataset, Predicted_Binding, Prediction)
FullDataset %>% head
## Peptide Dataset Predicted_Binding Prediction
## 1: AKSHNIALIW WuhanTest HLA-A3201 0.9445666
## 2: AKSHNIALIW WuhanTest HLA-B5701 0.9412294
## 3: AKSHNIALIW WuhanTest HLA-B5801 0.9411128
## 4: AKSHNIALIW WuhanTest HLA-B5802 0.9403259
## 5: APGQTGKIA WuhanTest HLA-B0702 0.9335080
## 6: APGQTGKIA WuhanTest HLA-B4201 0.9183891
FullDataset=FullDataset %>% select(! Dataset) %>% distinct()
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% mutate_if(is.numeric, round, 4)
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% mutate(Length = nchar(Peptide)) %>% filter(Length %in% c(9,10))
# Only NB-->B. Makes sense as i'm joining WT.
FULL_MUTATIONS_DT %>% anti_join(FullDataset)%>% mutate(GROUP = paste0(Binder, "_",MT_Binder))%>% select(GROUP) %>% table
## Joining, by = c("Peptide", "Predicted_Binding")
## .
## BINDER_BINDER BINDER_NONBINDER NONBINDER_BINDER
## 39 24 231
# Simply impute
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% mutate_if(is.numeric, round, 4)
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% left_join(FullDataset)
## Joining, by = c("Peptide", "Predicted_Binding")
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% left_join(FullDataset %>% dplyr::rename(VariantAlignment = Peptide, OmicronPrediction=Prediction))
## Joining, by = c("VariantAlignment", "Predicted_Binding")
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% mutate_if(is.numeric, round, 4)
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% dplyr::rename(TRAPP_Prediction=Prediction, TRAPP_Omicron = OmicronPrediction) %>% replace_na(list(TRAPP_Prediction = 0.01, TRAPP_Omicron=0.01))
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% mutate_if(is.numeric, round, 4)
#FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% mutate(Prediction = TRAPP_Prediction * `BA-score`)%>% mutate(OmicronPrediction = TRAPP_Omicron* `MT_BA-score`)
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% mutate(Prediction = TRAPP_Prediction * MHCScore)%>% mutate(OmicronPrediction = TRAPP_Omicron* MT_MHCScore)
#FULL_MUTATIONS_DT %>% mutate(MHCScore = min_max_norm(BA_Rank))%>% mutate(MT_MHCScore = min_max_norm(MT_BA_Rank))
#FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% filter(!(Binder == "BINDER" & MT_Binder == "BINDER" & Prediction < 0.75 & OmicronPrediction < 0.75))
#colnms = colnames(FULL_MUTATIONS_DT)
#NA_SET = FULL_MUTATIONS_DT %>%
# filter_at(vars(all_of(colnms)), any_vars(is.na(.)))
#B_NB=NA_SET %>% filter((Binder == "BINDER" & MT_Binder == "NONBINDER") ) %>% select(VariantAlignment,Predicted_Binding,MT_BA_Rank) %>% dplyr::rename(Peptide=VariantAlignment,BA_Rank=MT_BA_Rank)
#NB_B=NA_SET %>% filter((Binder == "NONBINDER" & MT_Binder == "BINDER") ) %>% select(Peptide,Predicted_Binding,BA_Rank)
#B_NB %>% rbind(NB_B)%>% readr::write_csv(file="NONBINDERS_FOR_TRAPP_BA1.csv")
#FULL_MUTATIONS_DT = FULL_MUTATIONS_DT%>% anti_join(NA_SET)
#NONBINDERS_TRAPP_SCORES=fread("TRAPP/DATA_V3_SARS2_ONLY/pred_scores/prott5_xl_bfd_nonbinder_sars2_mut_test_prediction_withlabel.txt") %>% filter(Dataset == "BA1")
#NONBINDERS_TRAPP_SCORES=NONBINDERS_TRAPP_SCORES %>% mutate(nlog2Rank = round(nlog2Rank, digits=4))
#B_NB=B_NB%>% distinct()%>%
# mutate(BA_Rank = as.numeric(BA_Rank))%>% mutate(nlog2Rank = - log2(BA_Rank)) %>% mutate(nlog2Rank = round(nlog2Rank, digits=4))
#B_NB %>% left_join(NONBINDERS_TRAPP_SCORES)
# Omicron: as a nonbinder, it is BELOW the immunogenic threshold, proportional to the MUTANT rank score
# Wuhan : as a non binder, it is BELOW the immunogenic threshold, proportional to the WT Rank score.
#NA_SET_BINDER_NONBINDER = NA_SET %>% filter((Binder == "BINDER" & MT_Binder == "NONBINDER") ) %>% mutate(OmicronPrediction = 0.75 - log(MT_BA_Rank,50000))
#NA_SET_NONBINDER_BINDER = NA_SET %>% filter((Binder == "NONBINDER" & MT_Binder == "BINDER") )%>% mutate(Prediction= 0.75 - log(BA_Rank,50000))
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% mutate_if(is.numeric, round, 4)
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% mutate(Prediction = replace(Prediction, Prediction<0.00001, 0.00001))%>%
mutate(OmicronPrediction = replace(OmicronPrediction, OmicronPrediction<0.00001, 0.00001))
#FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% rbind(NA_SET_BINDER_NONBINDER)%>% rbind(NA_SET_NONBINDER_BINDER)
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% filter(!(Prediction < 0.5 & OmicronPrediction < 0.5))
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% mutate(Prediction = round(Prediction, digits=4))%>% mutate(OmicronPrediction = round(OmicronPrediction, digits=4))%>% distinct()
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% mutate(Prediction = replace(Prediction, Prediction==0.0, 0.00001))%>%
mutate(OmicronPrediction = replace(OmicronPrediction, OmicronPrediction==0.0, 0.00001))
FULL_MUTATIONS_DT=FULL_MUTATIONS_DT %>% mutate_if(is.numeric, round, 4)
FULL_MUTATIONS_DT %>% filter(Peptide %in% grep("PRRAR", Peptide, value=T))%>% DT::datatable()
#FULL_MUTATIONS_DT %>% dplyr::rename("Variant Peptide"=VariantAlignment, "HLA Allele"=Predicted_Binding,WuhanPrediction=Prediction,#TRAPP_Wuhan=TRAPP_Prediction) %>%
# select(! c(`BA-score`,`MT_BA-score`))%>%
# readr::write_csv(file="/Users/paulbuckley/Nexus365/WIMM CCB - Koohy Group - Documents/Koohy #Group/Effect_Mutation_Tcells/V9/WORKING_VERSION/Supplementary Tables/Table2_Subvar_Tcellimmuno.csv")
my_comparisons = list(c("Wuhan","Omicron"))
VIOLIN_PLT=FULL_MUTATIONS_DT %>% select(Peptide, Variant, Predicted_Binding, Prediction, OmicronPrediction)%>%dplyr::rename(Wuhan=Prediction, Omicron=OmicronPrediction)%>%
melt%>% dplyr::rename(Dataset=variable, Prediction=value)%>%
ggviolin(x="Dataset",y="Prediction",add="boxplot",trim=TRUE)+stat_compare_means(comparisons = my_comparisons,label="p.signif")+geom_hline(yintercept = 0.75, linetype="dashed")+facet_grid(~Variant)+ylim(0,1)
## Warning in melt(.): The melt generic in data.table has been passed a tbl_df
## and will attempt to redirect to the relevant reshape2 method; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. To
## continue using melt methods from reshape2 while both libraries are attached,
## e.g. melt.list, you can prepend the namespace like reshape2::melt(.). In the
## next version, this warning will become an error.
## Using Peptide, Variant, Predicted_Binding as id variables
VIOLIN_PLT

saveRDS(VIOLIN_PLT,file="SUBVAR_WUHAN_OMI_VIOLIN_PLT.rds")
FULL_MUTATIONS_DT %>% select(Peptide, Variant, Predicted_Binding, Prediction, OmicronPrediction)%>%dplyr::rename(Wuhan=Prediction, Omicron=OmicronPrediction)%>%
melt%>% dplyr::rename(Dataset=variable, Prediction=value)%>%
ggboxplot(x="Dataset",y="Prediction")+stat_compare_means(comparisons = my_comparisons)+geom_hline(yintercept = 0.75, linetype="dashed")+facet_grid(~Variant)
## Warning in melt(.): The melt generic in data.table has been passed a tbl_df
## and will attempt to redirect to the relevant reshape2 method; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. To
## continue using melt methods from reshape2 while both libraries are attached,
## e.g. melt.list, you can prepend the namespace like reshape2::melt(.). In the
## next version, this warning will become an error.
## Using Peptide, Variant, Predicted_Binding as id variables

FULL_MUTATIONS_DT %>% select(Peptide, Variant, Predicted_Binding, Prediction, OmicronPrediction)%>%dplyr::rename(Wuhan=Prediction, Omicron=OmicronPrediction)%>%
melt%>% dplyr::rename(Dataset=variable, Prediction=value) %>%
ggdensity(x="Prediction",y="..density..",fill="Dataset")+geom_vline(xintercept = 0.75, linetype="dashed")+facet_grid(~Variant)
## Warning in melt(.): The melt generic in data.table has been passed a tbl_df
## and will attempt to redirect to the relevant reshape2 method; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. To
## continue using melt methods from reshape2 while both libraries are attached,
## e.g. melt.list, you can prepend the namespace like reshape2::melt(.). In the
## next version, this warning will become an error.
## Using Peptide, Variant, Predicted_Binding as id variables

ECDF_PLT=FULL_MUTATIONS_DT %>% select(Peptide, Variant, Predicted_Binding, Prediction, OmicronPrediction)%>%dplyr::rename(Wuhan=Prediction, Omicron=OmicronPrediction)%>%
melt%>% dplyr::rename(Dataset=variable, Prediction=value) %>%
ggplot(aes(x=Prediction, color=Dataset))+stat_ecdf(size=1)+theme_pubr(base_size = 16)+grids()+geom_vline(xintercept = 0.75,linetype="dashed")+facet_grid(~Variant)
## Warning in melt(.): The melt generic in data.table has been passed a tbl_df
## and will attempt to redirect to the relevant reshape2 method; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. To
## continue using melt methods from reshape2 while both libraries are attached,
## e.g. melt.list, you can prepend the namespace like reshape2::melt(.). In the
## next version, this warning will become an error.
## Using Peptide, Variant, Predicted_Binding as id variables
ECDF_PLT

saveRDS(ECDF_PLT,file="SUBVAR_ECDF_PLT_IMMUNO.rds")
AB_SUPERTYPES=fread("HLA_AB_SUPERTYPES.csv") %>% mutate(Allele = gsub("\\*","",Allele))%>% mutate(Allele = paste0("HLA-",Allele))%>% dplyr::rename(HLA_Allele = Allele)
DATA_FOR_ANALYSIS=FULL_MUTATIONS_DT
HLA_A_B_AGRETOPICITY=DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T)) %>% inner_join(AB_SUPERTYPES)
## Joining, by = "HLA_Allele"
HLA_CDATASET_AGRETOPICITY=DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(!HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T))%>% mutate(Supertype = stringr::str_extract(HLA_Allele,"HLA-C[0-9][0-9]"))%>%
mutate(Supertype = gsub("HLA\\-","",Supertype))
VARIANT = "BA2"
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>% filter(Variant == VARIANT) %>% dplyr::rename(Wuhan=Prediction, Omicron=OmicronPrediction) %>%
ggpaired(cond1 = "Wuhan", cond2 = "Omicron",line.color = "gray")+facet_wrap(~Supertype,ncol=9)+theme_pubr(base_size = 16)+stat_compare_means(label = "p.signif",label.x.npc = "center")+rotate_x_text(angle=45)+ylab("Immunogenicity Score")+ggtitle(VARIANT)

VARIANT = "BA4"
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>% filter(Variant == VARIANT) %>% dplyr::rename(Wuhan=Prediction, Omicron=OmicronPrediction) %>%
ggpaired(cond1 = "Wuhan", cond2 = "Omicron",line.color = "gray")+facet_wrap(~Supertype,ncol=9)+theme_pubr(base_size = 16)+stat_compare_means(label = "p.signif",label.x.npc = "center")+rotate_x_text(angle=45)+ylab("Immunogenicity Score")+ggtitle(VARIANT)

VARIANT = "BA5"
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>% filter(Variant == VARIANT)%>% dplyr::rename(Wuhan=Prediction, Omicron=OmicronPrediction) %>%
ggpaired(cond1 = "Wuhan", cond2 = "Omicron",line.color = "gray")+facet_wrap(~Supertype,ncol=9)+theme_pubr(base_size = 16)+stat_compare_means(label = "p.signif",label.x.npc = "center")+rotate_x_text(angle=45)+ylab("Immunogenicity Score")+ggtitle(VARIANT)

HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>% select(!Variant)%>% distinct()%>% dplyr::rename(Wuhan=Prediction, Omicron=OmicronPrediction) %>%
ggpaired(cond1 = "Wuhan", cond2 = "Omicron",line.color = "gray")+facet_wrap(~Supertype,ncol=9)+theme_pubr(base_size = 16)+stat_compare_means(label = "p.signif",label.x.npc = "center")+rotate_x_text(angle=45)+ylab("Immunogenicity Score")+ggtitle("ALL")
